This is an R Markdown Notebook. Each section of the code is then explained.

First of all import the libraries needed

#install.packages(c("datavolley", "ovlytics"))
library(datavolley)
library(ggplot2)
library(dplyr)
library(ovlytics)

Import the file you are interested in

#filename <- "C:/Users/mirko/OneDrive - Politecnico di Milano/Altro/Volley/Conco2324/Parella Torino/Ritorno/&09_VOL-LIB_Libellula_b1fa_23_s.dvw"
d <- dir("C:/Users/mirko/OneDrive - Politecnico di Milano/Altro/Volley/Conco2324/Parella Torino/Ritorno/", pattern = "dvw$", full.names = TRUE)
lx <- list()
## read each file
for (fi in seq_along(d)) lx[[fi]] <- dv_read(d[fi], insert_technical_timeouts = FALSE)
## now extract the play-by-play component from each and bind them together
px <- list()
for (fi in seq_along(lx)) px[[fi]] <- plays(lx[[fi]])
#plays(lx[[1]])
px <- do.call(rbind, px)

Now let’s focus on attack rate. CHANGE THE ’‘’teamName’’’ variable according to the match you are interested in

attack_rate <- px %>% dplyr::filter(skill == "Attack", team == teamName) %>%
  group_by(team, start_zone) %>% dplyr::summarize(n_attacks = n()) %>%
  mutate(rate = n_attacks/sum(n_attacks)) %>% ungroup
`summarise()` has grouped output by 'team'. You can override using the `.groups` argument.
## add x, y coordinates associated with the zones
attack_rate <- cbind(attack_rate, dv_xy(attack_rate$start_zone, end = "lower"))

## for team 2, these need to be on the top half of the diagram
tm2i <- attack_rate$team == teams(px)[2]
attack_rate[tm2i, c("x", "y")] <- dv_flip_xy(attack_rate[tm2i, c("x", "y")])

ggplot(attack_rate, aes(x, y, fill = rate)) + geom_tile() + ggcourt(labels = teams(px)) +
    scale_fill_gradient2(name = "Attack rate")

Now let’s plot the heatmap of where attacks ended

If you want a specific vector plot of attakcs directions:

playerName = 'ALESSIA DAMATO'
## first tabulate attacks by starting and ending zone
attack_rate <- px %>% dplyr::filter(team == teams(px)[2] & skill == "Attack" & team == teamName & player_name == playerName & start_zone == 4) %>%
  group_by(start_zone, end_zone) %>% tally() %>% ungroup

## convert counts to rates
attack_rate$rate <- attack_rate$n/sum(attack_rate$n)

## discard zones with zero attacks or missing location information
attack_rate <- attack_rate %>% dplyr::filter(rate>0 & !is.na(start_zone) & !is.na(end_zone))

## add starting x, y coordinates
attack_rate <- cbind(attack_rate, dv_xy(attack_rate$start_zone, end = "lower", xynames = c("sx", "sy")))

## and ending x, y coordinates
attack_rate <- cbind(attack_rate, dv_xy(attack_rate$end_zone, end = "upper", xynames = c("ex", "ey")))

## plot in reverse order so largest arrows are on the bottom
attack_rate <- attack_rate %>% dplyr::arrange(desc(rate))

p <- ggplot(attack_rate, aes(x, y, col = rate)) + ggcourt(labels = c(teams(px)[2], ""), court_colour = "indoor")
for (n in 1:nrow(attack_rate))
    p <- p + geom_path(data = data.frame(x = c(attack_rate$sx[n], attack_rate$ex[n]),
                                         y = c(attack_rate$sy[n], attack_rate$ey[n]),
                                         rate = attack_rate$rate[n]),
                       aes(size = rate), lineend = "round",
                       arrow = arrow(length = unit(2, "mm"), type = "closed", angle = 20, ends = "last"))
p + scale_colour_gradient(name = "Attack rate") + guides(size = "none")

Reception plots

px %>% dplyr::filter(skill == "Reception", team == teamName, end_zone == 5) %>% group_by(player_name) %>% 
  dplyr::summarize(N_receptions = n(), error_rate = sum(evaluation_code == "=",na.rm = TRUE))

Sankey Diagram for Reception

This diagram is useful to detect the direction the other team suffers more.

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

---
title: "Volleyball Data Analysis"
output:
  html_document:
    df_print: paged
  html_notebook: default
  pdf_document: default
---

This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. Each section of the code is then explained.

First of all import the libraries needed

```{r}
#install.packages(c("datavolley", "ovlytics"))
library(datavolley)
library(ggplot2)
library(dplyr)
library(ovlytics)
```

Import the file you are interested in

```{r}
#filename <- "C:/Users/mirko/OneDrive - Politecnico di Milano/Altro/Volley/Conco2324/Parella Torino/Ritorno/&09_VOL-LIB_Libellula_b1fa_23_s.dvw"
d <- dir("C:/Users/mirko/OneDrive - Politecnico di Milano/Altro/Volley/Conco2324/Parella Torino/Ritorno/", pattern = "dvw$", full.names = TRUE)
```

```{r}
lx <- list()
## read each file
for (fi in seq_along(d)) lx[[fi]] <- dv_read(d[fi], insert_technical_timeouts = FALSE)
## now extract the play-by-play component from each and bind them together
px <- list()
for (fi in seq_along(lx)) px[[fi]] <- plays(lx[[fi]])
#plays(lx[[1]])
px <- do.call(rbind, px)
```

Now let's focus on attack rate. CHANGE THE '''teamName''' variable according to the match you are interested in

```{r}
teamName = "VOLLEY PARELLA TORINO"
attack_rate <- px %>% dplyr::filter(skill == "Attack", team == teamName) %>%
  group_by(team, start_zone) %>% dplyr::summarize(n_attacks = n()) %>%
  mutate(rate = n_attacks/sum(n_attacks)) %>% ungroup
```

```{r}
## add x, y coordinates associated with the zones
attack_rate <- cbind(attack_rate, dv_xy(attack_rate$start_zone, end = "lower"))

## for team 2, these need to be on the top half of the diagram
tm2i <- attack_rate$team == teams(px)[2]
attack_rate[tm2i, c("x", "y")] <- dv_flip_xy(attack_rate[tm2i, c("x", "y")])

ggplot(attack_rate, aes(x, y, fill = rate)) + geom_tile() + ggcourt(labels = teams(px)) +
    scale_fill_gradient2(name = "Attack rate")
```

```{r}
ggplot(attack_rate, aes(x, y, fill = rate)) +
    ## plot just the background court colour
    ggcourt(court_colour = "indoor", background_only = TRUE) +
    ## add the heatmap
    geom_tile() +
    ## now add the grid lines and labels
    ggcourt(labels = teams(px), court_colour = "indoor", foreground_only = TRUE) +
    scale_fill_gradient2(name = "Attack rate")
```

Now let's plot the heatmap of where attacks ended

```{r}
playerName = "ALESSIA DAMATO"
## calculate attack frequency by zone, per team
attack_rate <- px %>% 
    dplyr::filter(team == teamName & skill == "Attack" & player_name == playerName & start_zone == 4) %>%
    group_by(end_zone) %>% dplyr::summarize(n_attacks = n()) %>%
    mutate(rate = n_attacks/sum(n_attacks)) %>% ungroup
attack_rate <- cbind(attack_rate, dv_xy(attack_rate$end_zone, end = "lower"))
ggplot(attack_rate, aes(x, y, fill = n_attacks)) + geom_tile() + ggcourt("lower", labels = NULL) +
    scale_fill_gradient2(name = "Rate: attack\nend location") + geom_text(aes(label = n_attacks), color = "black")
```

```{r}
attack_rate <- px %>% 
    dplyr::filter(team != teamName & skill == "Attack" & start_zone == 4 & !is.na(end_subzone)) %>%
    group_by(end_zone, end_subzone) %>% dplyr::summarize(n_attacks = n()) %>%
    mutate(rate = n_attacks/sum(n_attacks)) %>% ungroup
```

If you want a specific vector plot of attakcs directions:

```{r}
playerName = 'ALESSIA DAMATO'
## first tabulate attacks by starting and ending zone
attack_rate <- px %>% dplyr::filter(team == teams(px)[2] & skill == "Attack" & team == teamName & player_name == playerName & start_zone == 4) %>%
  group_by(start_zone, end_zone) %>% tally() %>% ungroup

## convert counts to rates
attack_rate$rate <- attack_rate$n/sum(attack_rate$n)

## discard zones with zero attacks or missing location information
attack_rate <- attack_rate %>% dplyr::filter(rate>0 & !is.na(start_zone) & !is.na(end_zone))

## add starting x, y coordinates
attack_rate <- cbind(attack_rate, dv_xy(attack_rate$start_zone, end = "lower", xynames = c("sx", "sy")))

## and ending x, y coordinates
attack_rate <- cbind(attack_rate, dv_xy(attack_rate$end_zone, end = "upper", xynames = c("ex", "ey")))

## plot in reverse order so largest arrows are on the bottom
attack_rate <- attack_rate %>% dplyr::arrange(desc(rate))

p <- ggplot(attack_rate, aes(x, y, col = rate)) + ggcourt(labels = c(teams(px)[2], ""), court_colour = "indoor")
for (n in 1:nrow(attack_rate))
    p <- p + geom_path(data = data.frame(x = c(attack_rate$sx[n], attack_rate$ex[n]),
                                         y = c(attack_rate$sy[n], attack_rate$ey[n]),
                                         rate = attack_rate$rate[n]),
                       aes(size = rate), lineend = "round",
                       arrow = arrow(length = unit(2, "mm"), type = "closed", angle = 20, ends = "last"))
p + scale_colour_gradient(name = "Attack rate") + guides(size = "none")
```

## Reception plots

```{r}
px %>% dplyr::filter(skill == "Reception", team == teamName, end_zone == 5) %>% group_by(player_name) %>% 
  dplyr::summarize(N_receptions = n(), error_rate = sum(evaluation_code == "=",na.rm = TRUE))
```

```{r}
table_data <- px %>% 
  dplyr::filter(skill == "Reception", team == teamName, end_zone == 5) %>% 
  group_by(player_name) %>% 
  dplyr::summarize(
    N_receptions = n(),
    count_perfette = sum(evaluation_code == "#", na.rm = TRUE),
    count_positive = sum(evaluation_code == "+", na.rm = TRUE),
    count_escalamative = sum(evaluation_code == "!", na.rm = TRUE),
    count_negative = sum(evaluation_code == "-", na.rm = TRUE),
    count_errori = sum(evaluation_code == "=", na.rm = TRUE),
    positività = (count_positive + count_perfette)/N_receptions,
    efficienza = (count_positive + count_perfette - count_errori)/N_receptions,
  )

table_data
```

```{r}
# Histogram of positività
ggplot(table_data, aes(x = player_name, y = positività)) +
  geom_bar(stat = "identity", fill = "blue", color = "black") +
  labs(title = "Bar Plot of Positività",
       x = "Giocatori",
       y = "Positività")

ggplot(table_data, aes(x = player_name, y = efficienza)) +
  geom_bar(stat = "identity", fill = "blue", color = "black") +
  labs(title = "Efficienza",
       x = "Giocatori",
       y = "Efficienza")
```

```{r}

reception_rate <- px %>% dplyr::filter(skill == "Reception", team==teamName) %>%
    group_by(player_name, end_zone, evaluation_code) %>% dplyr::summarize(n_reception = n()) %>%
    mutate(rate = (evaluation_code == "=")/sum(n_reception)) %>% ungroup

## add x, y coordinates associated with the zones
reception_rate <- cbind(reception_rate, dv_xy(reception_rate$end_zone, end = "lower"))

## for team 2, these need to be on the top half of the diagram
tm2i <- reception_rate$team == teams(px)[2]
reception_rate[tm2i, c("x", "y")] <- dv_flip_xy(reception_rate[tm2i, c("x", "y")])

ggplot(reception_rate, aes(x, y, fill = rate)) + geom_tile() + ggcourt(labels = teams(px)) +
    scale_fill_gradient2(name = "Reception rate")

# Find the player with the maximum rate for each zone
max_rate_players <- reception_rate %>%
  group_by(end_zone) %>%
  slice(which.max(rate))

# Plot the heatmap with only the player with max rate for each zone
ggplot(reception_rate, aes(x, y, fill = rate)) +
  geom_tile() +
  geom_text(data = max_rate_players[max_rate_players$rate > 0, ], aes(label = paste(player_name, ":\n", round(rate * 100), "%")), color = "black", size = 3) +
  ggcourt(labels = teams(px)) +
  scale_fill_gradient2(name = "Reception rate")
```

## Sankey Diagram for Reception

This diagram is useful to detect the direction the other team suffers more.

```{r}
#install.packages("networkD3")
#library(plotly)

# Example data
# Assuming you already have a dataframe called 'reception_data' with columns 'start_zone', 'end_zone', and 'evaluation_code'

# Prepare data: calculate count of evaluation_code for each combination of start_zone and end_zone
sankey_data <- px %>%
  filter(!is.na(start_zone) & !is.na(end_zone) & skill == "Reception" & (evaluation_code == '='|evaluation_code == '-')) %>%
  group_by(start_zone, end_zone, evaluation_code) %>%
  summarize(count = n()) %>%
  group_by(start_zone, end_zone) %>%
  summarize(value = sum(count))

# Create Sankey diagram
plot_ly(
  type = "sankey",
  orientation = "h",
  node = list(
    label = unique(c(sankey_data$start_zone, sankey_data$end_zone))
  ),
  link = list(
    source = match(sankey_data$start_zone, unique(c(sankey_data$start_zone, sankey_data$end_zone))) - 1,
    target = match(sankey_data$end_zone, unique(c(sankey_data$start_zone, sankey_data$end_zone))) - 1,
    value = sankey_data$value
  )
)


```

Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Ctrl+Alt+I*.
